************************************************************************************
*This file builds vars on enrollment bby institution characteristics from the IPEDS*
************************************************************************************

use "$clean_data_education/fallEnrollmentTotalsReshaped.dta", clear
merge 1:m unitid yearGroup using "$clean_data_education/locations.dta", keep(3) nogenerate  // we need locations only for the obs that are availabe, there are also some obs in 1990 that do not have info on location (_merge = 1)
merge m:1 unitid using "$clean_data_education/institutionControls.dta", keep(1 3) keepusing(forProfit public iclevel freshmenFullTimeAvg studentsAvg sector) nogenerate  // there will be some _merge = 1 from alaska and co

	drop if nonMainland == 1 | (year == 1990 & mi(czone)) // dropping uni in alaska and so on and dropping some obs in 1991 that have details for enrollment but not on institution details
	drop if yearGroup == 1990 
	drop if forProfit == 1
	drop if freshmenFullTimeAvg < 50
	drop if periods < 3
	
*egen total11 = rsum(total3 total12), missing
*egen women11 = rsum(women3 women12), missing
*egen men11 = rsum(men3 men12), missing

*** aggregation by type of institution
gen und4Y_f = 0
	replace und4Y_f = women3 if (sector >= 1 & sector <= 3) 
gen und2Y_f = 0
	replace und2Y_f = women3 if (sector >= 4 & sector <= 6) 
gen undL2Y_f = 0
	replace undL2Y_f = women3 if (sector >= 7 & sector <= 9) 

gen und4Y_m = 0
	replace und4Y_m = men3 if (sector >= 1 & sector <= 3) 
gen und2Y_m = 0
	replace und2Y_m = men3 if (sector >= 4 & sector <= 6) 
gen undL2Y_m = 0
	replace undL2Y_m = men3 if (sector >= 7 & sector <= 9) 

	
*** weighting 
foreach i of var und* women3 men3 {	
	replace `i' = `i' * ourRatio
	}

** collapse to obtain aggregated values at CZ level
foreach v of var * {
local l`v' : variable label `v'
	if `"`l`v''"' == "" {
	local l`v' "`v'"
  	}
}
collapse (last) statefip region division name_czone (sum) women3 men3 und*, by(czone yearGroup)
foreach v of var * {
label var `v' "`l`v''"
}

**** Ratios and delta
merge 1:1 czone yearGroup using "$clean_data_lmarket/census_populationDataCZ.dta", keep(2 3) nogenerate keepusing(ipums_pop)
merge m:1 czone using "$clean_data_lmarket/czone1990_school_demographics.dta", keep(3) nogenerate keepusing(ipums_pop_m_1990 ipums_pop_f_1990)
merge m:1 czone using "$clean_data_lmarket/czone2000_school_demographics.dta", keep(3) nogenerate keepusing(ipums_pop_m_2000 ipums_pop_f_2000)
merge m:1 czone using "$clean_data_lmarket/czone2008_school_demographics.dta", keep(3) nogenerate keepusing(ipums_pop_m_2008 ipums_pop_f_2008)


foreach i in m f {
gen ipums_pop_`i' = ipums_pop_`i'_1990
	replace ipums_pop_`i' = ipums_pop_`i'_2000 if yearGroup == 2000
	replace ipums_pop_`i' = ipums_pop_`i'_2008 if yearGroup == 2007
}

*  ratios
foreach var of var women3 men3 und* {
	gen `var'_pop = `var'/ipums_pop
	}

foreach i in m f {
	replace und4Y_`i' = und4Y_`i'/ ipums_pop_`i'
	replace und2Y_`i' = und2Y_`i'/ ipums_pop_`i'
	replace undL2Y_`i' = undL2Y_`i'/ ipums_pop_`i'
}

replace women3 = women3/ ipums_pop_f
replace men3 = men3/ ipums_pop_m
 

** keep only the variables for which you built the ratios
keep yearGroup czone statefip region division name_czone women3 men3 und*

* delta 
sort czone yearGroup
gen d = .
	replace d = 1 if yearGroup == 1994
	replace d = 2 if yearGroup == 2000
	replace d = 3 if yearGroup == 2007
	replace d = 4 if yearGroup == 2014

	*Temp file to match deltas with ratios by year (to have the beginning of period on the save row)
	preserve
	gen year = 1990 if yearGroup == 1994
	replace year = 2000 if yearGroup == 2000
	replace year = 2008 if yearGroup == 2007
	drop yearGroup d
	drop if mi(year)
	save "$final_data_outcomes/temp.dta", replace
	restore

***
foreach v of var und* women3 men3 {
	local l`v' : variable label `v'
		if `"`l`v''"' == "" {
		local l`v' "`v'"
		}
	}

xtset czone d
foreach i of var und* women3 men3 {
	gen  dEnr_`i' = (`i' - L.`i')*100
	}

foreach v of var und* women3 men3 {
	label var dEnr_`v' "`l`v''"
	}
***

xtset, clear
drop d und*

* generate time variable needed by Ben's code
gen year = 1990 if yearGroup == 2000
	replace year = 2000 if yearGroup == 2007
	replace year = 2008 if yearGroup == 2014
	drop if mi(year)

merge 1:1 czone year using "$final_data_outcomes/temp.dta", keep(3) nogenerate // merge = 2, only one ratio -> no delta is possible

save "$final_data_outcomes/IPEDS_EnrollmentCZ_gender.dta", replace

rm "$final_data_outcomes/temp.dta"
